library(readr)
library(tidyverse)
library(reshape2)
library(corrplot)
library(psych)
library(pheatmap)
library(RColorBrewer)
library(cowplot)
# Chunk options
knitr::opts_chunk$set(echo = TRUE, prompt = TRUE,cache = TRUE)

Read in data

> european <- read_csv("european_recoded.csv")
> australian <- read_csv("australian_recoded.csv")
> 
> all <- merge(european,australian,all = TRUE)

Variables that are interesting but are different between contexts

> demographics_var <- c("Age","Gender","L1","speak.other.L2","study.other.L2","origins","year.studyL2","other5.other.ways","degree","roleL2.degree","study.year","prof","L2.VCE","uni1.year","Context")
> l2School <- "\\.L2school$"
> l2School_variables <- colnames(all)[grep(l2School,colnames(all))]
  • First language
> #table(all$L1,all$Context) # too many levels - needs to be cleaned (ex tot number of languages?)
> table(all$L1,useNA = "always")
## 
##          Afrikaans           Albanian            Burmese 
##                  1                  2                  1 
##          Cantonese            Chinese           Croatian 
##                  4                  7                  1 
##              Dutch            English  English and Dutch 
##                  1                201                  2 
##             German German and English German and Turkish 
##                 77                  2                  1 
##                  I         Indonesian            Italian 
##                  1                  1                 89 
##           Japanese           Mandarin            Persian 
##                  1                  5                  1 
##    Persian (Farsi)           Romanian            Russian 
##                  1                  3                  2 
##             Sindhi             Slovak            Spanish 
##                  1                  1                  3 
##            Turkish          Ukrainian               <NA> 
##                  1                  1                 67
> ggplot(all,aes(x=L1,fill=Context)) + geom_bar() + coord_flip() + ggtitle("First Language") + labs(y="N. of participants",x="")

> #table(all$speak.other.L2,all$Context)
> L2 <- data.frame(Freq=table(all$speak.other.L2)[order(table(all$speak.other.L2),decreasing = TRUE)],
+                  L2=names(table(all$speak.other.L2))[order(table(all$speak.other.L2),decreasing = TRUE)]) # too many levels - needs to be cleaned (ex tot number of languages?)
> head(L2)
##   Freq.Var1 Freq.Freq       L2
## 1        No       171       No
## 2       Yes       143      Yes
## 3    French        13   French
## 4   English         9  English
## 5   Italian         7  Italian
## 6  Japanese         4 Japanese
  • origins
> table(all$origins,useNA = "always")
## 
##   No  Yes <NA> 
##  323   89   66
> table(all$year.studyL2)
## 
##                      0 years                   1- 3 years 
##                           69                           12 
##                    1-3 years                    4-6 years 
##                           11                           61 
## First year of primary school                 Kindergarten 
##                           80                           30 
##             Less than a year            more than 6 years 
##                           27                           49 
##                        Other 
##                           72
> table(all$degree)
## 
##                     BA in Anglistik            BA in Nordamerikastudien 
##                                  43                                   4 
##                                 HUM                             HUM.SCI 
##                                 129                                   6 
##                                  LA      Lingue e letterature straniere 
##                                  36                                  82 
## Lingue, mercati e culture dell'Asia                                  QC 
##                                  13                                   5 
##                                 SCI 
##                                  86
  • study.year in the European context is uni1.year in the Australian context
> all$study.year[is.na(all$study.year)] <- all$uni1.year[is.na(all$study.year)]
> #table(all$study.year)
> all$study.year <- ifelse(all$study.year == "Already graduated after 5 semesters in March 2016, was interested in survery/study, sorry.","6th semester",all$study.year)
> table(all$study.year)
## 
##       1st semester           1st year       2nd semester 
##                 72                255                  5 
##           2nd year       3rd semester           3rd year 
##                 37                  5                 20 
## 3rd year of Master  4th year bachelor       5th semester 
##                  1                  8                  2 
##       6th semester           7th year             Master 
##                  3                  1                  3
  • proficiency
> table(all$prof,useNA = "always")
## 
##           Advanced         Elementary       Intermediate 
##                 78                105                 84 
## Upper-intermediate               <NA> 
##                146                 65
  • Barplot filtered

Filter participants : keep only the ones that meet the inclusion criteria.

> all$study.year[is.na(all$study.year)] <- all$uni1.year[is.na(all$study.year)]
> # Filter only subject that we want to include in the study
> # names(table(all$study.year))[1] = 1st semester"
> 
> filtered <- subset(all, (study.year == "1st year") | (study.year == names(table(all$study.year))[1]) & year.studyL2 != "0 years")
> 
> table(filtered$Context)
## 
##   English in Germany     English in Italy  German in Australia 
##                   71                   91                   89 
## Italian in Australia 
##                   75
> table(all$Context)
## 
##   English in Germany     English in Italy  German in Australia 
##                   96                  113                  146 
## Italian in Australia 
##                  123

Demographics

> all <- filtered
> demographics_var <- c("Age","Gender","L1","speak.other.L2","study.other.L2","origins","year.studyL2","other5.other.ways","degree","roleL2.degree","study.year","prof","L2.VCE","uni1.year","Context")
> l2School <- "\\.L2school$"
> l2School_variables <- colnames(all)[grep(l2School,colnames(all))]
> 
> ggplot(all,aes(x=L1,fill=Context)) + geom_bar() + coord_flip() + ggtitle("First Language") + labs(y="N. of participants",x="") + theme_bw()

> table(all$L1,all$Context)
##                     
##                      English in Germany English in Italy
##   Afrikaans                           0                0
##   Albanian                            0                1
##   Cantonese                           0                0
##   Chinese                             0                2
##   Dutch                               1                0
##   English                             1                0
##   English and Dutch                   0                0
##   German                             64                0
##   German and English                  1                0
##   I                                   0                0
##   Indonesian                          0                0
##   Italian                             0               87
##   Japanese                            0                0
##   Mandarin                            0                0
##   Persian (Farsi)                     0                0
##   Romanian                            0                0
##   Russian                             2                0
##   Sindhi                              0                0
##   Spanish                             1                0
##   Turkish                             1                0
##   Ukrainian                           0                1
##                     
##                      German in Australia Italian in Australia
##   Afrikaans                            1                    0
##   Albanian                             0                    0
##   Cantonese                            2                    0
##   Chinese                              2                    0
##   Dutch                                0                    0
##   English                             75                   73
##   English and Dutch                    2                    0
##   German                               0                    0
##   German and English                   1                    0
##   I                                    0                    1
##   Indonesian                           1                    0
##   Italian                              0                    0
##   Japanese                             1                    0
##   Mandarin                             1                    1
##   Persian (Farsi)                      1                    0
##   Romanian                             1                    0
##   Russian                              0                    0
##   Sindhi                               1                    0
##   Spanish                              0                    0
##   Turkish                              0                    0
##   Ukrainian                            0                    0
  • Filter for L1
> #Filter by L1
> 
> nc <- names(table(all$Context))
> l1_filter <- all[(all$Context == nc[1] & (all$L1 == "German" | all$L1 == "German and English")) | 
+                     (all$Context == nc[2] & (all$L1 == "Italian")) | 
+                     (all$Context == nc[3] & (all$L1 == "English" | all$L1 == "English and Dutch" | all$L1 == "German and English")) |
+                     (all$Context == nc[4] & (all$L1 == "English" | all$L1 == "English and Dutch" | all$L1 == "German and English")),]
> 
> 
> all <- l1_filter
> 
> # subset demographics
> demo <- subset(all,select=c("Resp.ID",demographics_var,l2School_variables))
> 
> # Numeri finali
> table(l1_filter$Context)
## 
##   English in Germany     English in Italy  German in Australia 
##                   65                   87                   78 
## Italian in Australia 
##                   73
  • Filter missing value:
  • Filter participants who didn’t put the degree
  • we don’t care about speak.other.L2 and study.other.L2
> missing_bySample <- rowSums(is.na(demo))
> names(missing_bySample) <- demo$Resp.ID
> missing_byVar <- colSums(is.na(demo))
> names(missing_byVar) <- colnames(demo)
> 
> barplot(missing_bySample)

> d <- data.frame(miss=missing_byVar)
> d$varID <- rownames(d)
> ggplot(data=d,aes(x=varID,y=miss)) + geom_bar(stat="identity") + theme_bw() +theme(axis.text.x = element_text(angle = 45, hjust = 1)) 

> demo_missing <- demo %>% group_by(Context) %>% summarise(roleL2.degree_na = sum(is.na(roleL2.degree)),
+                                                          L2.VCE_na = sum(is.na(L2.VCE)),
+                                                          other5.other.ways_na=sum(is.na(other5.other.ways )),
+                                                          uni1.year_na = sum(is.na(uni1.year)),
+                                                          primary1.L2school_na=sum(is.na(primary1.L2school)),
+                                                          CLS3.L2school_na = sum(is.na(CLS3.L2school)),
+                                                          VSL4.L2school_na=sum(is.na(VSL4.L2school)),
+                                                          degree = sum(is.na(degree)),
+                                                          schooL2country5.L2school_na=sum(is.na(schooL2country5.L2school)))
> 
> # We do not filter for speak.other.L2 or study.other.L2
> 
> #demo[is.na(demo$speak.other.L2),]
> # teniamo
> #demo[is.na(demo$study.other.L2),]
> missing_bySample[names(missing_bySample) == "5166861581"]
## 5166861581 
##         10
> #demo[is.na(demo$year.studyL2),]
> missing_bySample[names(missing_bySample) == "5378798787"]
## 5378798787 
##          3
> # remove NA from degree
> #table(demo$degree,useNA = "always")
> # Remove people
> all <- all[!is.na(all$degree),]
> 
> 
> table(all$Context)
## 
##   English in Germany     English in Italy  German in Australia 
##                   64                   87                   77 
## Italian in Australia 
##                   72
  • Summary demographics

  • to change yearL2.study.richi

> # add numbers on the bar
> ggplot(all,aes(x=Age,fill=Context)) + geom_bar(position="dodge",colour="white")  + theme_bw() + labs(y="N participants")

> # add numbers on the bar
> ggplot(all,aes(x=origins,fill=Context)) + geom_bar(position="dodge",colour="white") + theme_bw() + ggtitle("Origins by context")

> # year study L2
> table(all$year.studyL2,all$other.year.studyL2.richi)
##                               
##                                BILINGUAL FIRST.YEAR.SECONDARY
##   0 years                              0                    0
##   1- 3 years                           0                    0
##   1-3 years                            0                    0
##   4-6 years                            0                    0
##   First year of primary school         0                    0
##   Kindergarten                         0                    0
##   Less than a year                     0                    0
##   more than 6 years                    0                    0
##   Other                                3                    7
##                               
##                                FOURTH YEAR PRIMARY FOURTH.YEAR.PRIMARY
##   0 years                                        0                   0
##   1- 3 years                                     0                   0
##   1-3 years                                      0                   0
##   4-6 years                                      0                   0
##   First year of primary school                   0                   0
##   Kindergarten                                   0                   0
##   Less than a year                               0                   0
##   more than 6 years                              0                   0
##   Other                                          1                   3
##                               
##                                LOWER.SECONDARY PERSONAL
##   0 years                                    0        0
##   1- 3 years                                 0        0
##   1-3 years                                  0        0
##   4-6 years                                  0        0
##   First year of primary school               0        0
##   Kindergarten                               0        0
##   Less than a year                           0        0
##   more than 6 years                          0        0
##   Other                                      4        2
##                               
##                                SECOND.YEAR.ELEMENTARY SECOND.YEAR.PRIMARY
##   0 years                                           0                   0
##   1- 3 years                                        0                   0
##   1-3 years                                         0                   0
##   4-6 years                                         0                   0
##   First year of primary school                      0                   0
##   Kindergarten                                      0                   0
##   Less than a year                                  0                   0
##   more than 6 years                                 0                   0
##   Other                                             1                   1
##                               
##                                SECOND.YEAR.SECONDARY THIRD.YEAR.PRIMARY
##   0 years                                          0                  0
##   1- 3 years                                       0                  0
##   1-3 years                                        0                  0
##   4-6 years                                        0                  0
##   First year of primary school                     0                  0
##   Kindergarten                                     0                  0
##   Less than a year                                 0                  0
##   more than 6 years                                0                  0
##   Other                                            2                 27
> all$year.studyL2 <- ifelse(all$year.studyL2 == "Other",all$other.year.studyL2.richi,all$year.studyL2 )
> 
> # European context
> ggplot(all[all$Context == "English in Germany" | all$Context == "English in Italy",],aes(x=degree,fill=year.studyL2)) + geom_bar(position="dodge",colour="white") + theme_bw() + ggtitle("Degree by study year L2, by Context") +  facet_grid(~Context,scales="free") + theme(axis.text.x = element_text(angle = 45, hjust = 1)) + labs(y = "N participants", x = "degree")

> # Australian context
> ggplot(all[all$Context == "Italian in Australia" | all$Context == "German in Australia",],aes(x=degree,fill=study.year)) + geom_bar(position="dodge",colour="white") + theme_bw() + ggtitle("Degree by study year, by previous study") + coord_flip() + facet_grid(Context~year.studyL2,scales="free")

> all_melt <- melt(all,id.vars = c("Resp.ID","Gender","Age","prof","Context","study.year"),
+                         measure.vars = likert_variables_all)
> 
> all_melt$value <- factor(all_melt$value,levels=c("Strongly disagree","Disagree","Not sure","Agree","Strongly agree"))
> 
> all_melt <- all_melt %>% separate(variable,into=c("item","type"),sep="\\.",remove=FALSE)
## Warning: Too few values at 600 locations: 8701, 8702, 8703, 8704, 8705,
## 8706, 8707, 8708, 8709, 8710, 8711, 8712, 8713, 8714, 8715, 8716, 8717,
## 8718, 8719, 8720, ...
> ggplot(all_melt,aes(x=variable,fill=value)) + geom_bar(position = "stack") + 
+   facet_grid(Context~type,scales = "free")+theme(axis.text.x = element_text(angle = 45, hjust = 1),axis.text=element_text(size=8)) + ggtitle("Filtered dataset")

> filt_sum <- all_melt %>% group_by(Context,variable,type,value) %>% dplyr::summarise(Ngroup=length(value))
> ggplot(filt_sum,aes(x=value,y=Ngroup,colour=Context,group=interaction(variable, Context))) + geom_line() + geom_point() + facet_wrap(~type,scales = "free")+theme(axis.text.x = element_text(angle = 45, hjust = 1))

Convert Likert scales to numbers

> convertToNumber <- function(column){
+   column <- factor(column,levels = c("Strongly disagree","Disagree","Not sure","Agree","Strongly agree"))
+   column_number <- as.numeric(column)
+   return(column_number)
+ }
> 
> table(all$Context)
## 
##   English in Germany     English in Italy  German in Australia 
##                   64                   87                   77 
## Italian in Australia 
##                   72
> convert_likert <- data.frame(apply(subset(all,select=likert_variables_all),2,convertToNumber))
> colnames(convert_likert) <- paste0(colnames(convert_likert),"1")
> 
> likert_variables1 <- paste0(likert_variables_all,"1")
> 
> # join the converted variables to the filtered dataset
> filtered_conv <- cbind(all,convert_likert)
> 
> table(filtered_conv[,likert_variables_all[1]],filtered_conv[,likert_variables1[1]],useNA = "always")
##                 
##                    2   3   4   5 <NA>
##   Agree            0   0 120   0    0
##   Disagree         8   0   0   0    0
##   Not sure         0  27   0   0    0
##   Strongly agree   0   0   0 145    0
##   <NA>             0   0   0   0    0
> cov <- cor(filtered_conv[filtered_conv$Context == "Italian in Australia",likert_variables1[!(likert_variables1 %in% "necessity1")]],method = "pearson",use="pairwise.complete.obs")
> #corrplot(cov,method="color",is.corr = TRUE,order = "hclust",tl.col = "black",mar = c(1, 0,1, 0),tl.cex=0.8)
> 
> library(pheatmap)
> row_infos <- data.frame(Variables=sapply(strsplit(colnames(cov),split="\\."),function(x) x[2]))
> row_infos$Variables <- as.character(row_infos$Variables)
> rownames(row_infos) <- rownames(cov)
> row_infos$Variables[which(is.na(row_infos$Variables))] <- c("educated")
> row_infos <- row_infos[order(row_infos$Variables),,drop=FALSE]
> 
> ann_col_wide <- data.frame(Variable=unique(row_infos$Variables))
> ann_colors_wide <- list(Variables=c(comm1="#bd0026",educated="#b35806", id1="#f6e8c3",instru1="#35978f",integr1="#386cb0",intr1="#ffff99",ought1="grey",post1="black",prof1="pink"))
> 
> #pheatmap(cov, main = "Italian in Australia",annotation_names_row = FALSE,cluster_cols=TRUE,cluster_rows=TRUE,annotation_col = row_infos[,1,drop=FALSE], annotation_row = row_infos[,1,drop=FALSE],  annotation_colors = ann_colors_wide,breaks=seq(-1,1,0.2),col=c("#67001f","#b2182b","#d6604d","#f4a582","#fddbc7","#f7f7f7","#d1e5f0","#92c5de","#4393c3","#2166ac","#053061"),show_colnames = FALSE,width = 7,height = 7)
> ###################
> 
> diag(cov) <- NA
> pheatmap(cov, main = "Italian in Australia",annotation_names_row = FALSE,cluster_cols=TRUE,cluster_rows=TRUE,annotation_col = row_infos[,1,drop=FALSE], annotation_row = row_infos[,1,drop=FALSE]
+ ,  annotation_colors = ann_colors_wide,show_colnames = FALSE,breaks = seq(-0.6,0.7,length.out = 50),width = 7,height = 7,color=colorRampPalette(brewer.pal(n = 7, name = "RdBu"))(50))

> cov <- cor(filtered_conv[filtered_conv$Context == "German in Australia",likert_variables1[!(likert_variables1 %in% "necessity1")]],method = "pearson",use="pairwise.complete.obs")
> 
> row_infos <- data.frame(Variables=sapply(strsplit(colnames(cov),split="\\."),function(x) x[2]))
> row_infos$Variables <- as.character(row_infos$Variables)
> rownames(row_infos) <- rownames(cov)
> row_infos$Variables[which(is.na(row_infos$Variables))] <- c("educated")
> row_infos <- row_infos[order(row_infos$Variables),,drop=FALSE]
> 
> ann_col_wide <- data.frame(Variable=unique(row_infos$Variables))
> ann_colors_wide <- list(Variables=c(comm1="#bd0026",educated="#b35806", id1="#f6e8c3",instru1="#35978f",integr1="#386cb0",intr1="#ffff99",ought1="grey",post1="black",prof1="pink"))
> 
> diag(cov) <- NA
> pheatmap(cov, main = "German in Australia",annotation_names_row = FALSE,cluster_cols=TRUE,cluster_rows=TRUE,annotation_col = row_infos[,1,drop=FALSE], annotation_row = row_infos[,1,drop=FALSE]
+ ,  annotation_colors = ann_colors_wide,show_colnames = FALSE,breaks = seq(-0.6,0.7,length.out = 50),width = 7,height = 7,color=colorRampPalette(brewer.pal(n = 7, name = "RdBu"))(50))

> cov <- cor(filtered_conv[filtered_conv$Context == "English in Germany",likert_variables1[!(likert_variables1 %in% c("reconnect.comm1",    "speakersmelb.comm1","comecloser.comm1","educated1"))]],method = "pearson",use="pairwise.complete.obs")
> 
> row_infos <- data.frame(Variables=sapply(strsplit(colnames(cov),split="\\."),function(x) x[2]))
> row_infos$Variables <- as.character(row_infos$Variables)
> rownames(row_infos) <- rownames(cov)
> row_infos$Variables[which(is.na(row_infos$Variables))] <- c("necessity")
> row_infos <- row_infos[order(row_infos$Variables),,drop=FALSE]
> 
> ann_col_wide <- data.frame(Variable=unique(row_infos$Variables))
> ann_colors_wide <- list(Variables=c(id1="#f6e8c3",necessity="#b35806",instru1="#35978f",integr1="#386cb0",intr1="#ffff99",ought1="grey",post1="black",prof1="pink"))
> 
> diag(cov) <- NA
> pheatmap(cov, main = "English in Germany",annotation_names_row = FALSE,cluster_cols=TRUE,cluster_rows=TRUE,annotation_col = row_infos[,1,drop=FALSE], annotation_row = row_infos[,1,drop=FALSE]
+ ,  annotation_colors = ann_colors_wide,show_colnames = FALSE,breaks = seq(-0.6,0.7,length.out = 50),width = 7,height = 7,color=colorRampPalette(brewer.pal(n = 7, name = "RdBu"))(50))

> cov <- cor(filtered_conv[filtered_conv$Context == "English in Italy",likert_variables1[!(likert_variables1 %in% c("reconnect.comm1","speakersmelb.comm1","comecloser.comm1","educated1"))]],method = "pearson",use="pairwise.complete.obs")
> 
> row_infos <- data.frame(Variables=sapply(strsplit(colnames(cov),split="\\."),function(x) x[2]))
> row_infos$Variables <- as.character(row_infos$Variables)
> rownames(row_infos) <- rownames(cov)
> row_infos$Variables[which(is.na(row_infos$Variables))] <- "necessity"
> row_infos <- row_infos[order(row_infos$Variables),,drop=FALSE]
> 
> ann_col_wide <- data.frame(Variable=unique(row_infos$Variables))
> ann_colors_wide <- list(Variables=c(comm1="#bd0026",necessity="#b35806", id1="#f6e8c3",instru1="#35978f",integr1="#386cb0",intr1="#ffff99",ought1="grey",post1="black",prof1="pink"))
> 
> diag(cov) <- NA
> pheatmap(cov, main = "English in Italy",annotation_names_row = FALSE,cluster_cols=TRUE,cluster_rows=TRUE,annotation_col = row_infos[,1,drop=FALSE], annotation_row = row_infos[,1,drop=FALSE]
+ ,  annotation_colors = ann_colors_wide,show_colnames = FALSE,breaks = seq(-0.6,0.7,length.out = 50),width = 7,height = 7,color=colorRampPalette(brewer.pal(n = 7, name = "RdBu"))(50))

> cov <- cor(filtered_conv[,likert_variables1],method = "pearson",use="pairwise.complete.obs")
> 
> row_infos <- data.frame(Variables=sapply(strsplit(colnames(cov),split="\\."),function(x) x[2]))
> row_infos$Variables <- as.character(row_infos$Variables)
> rownames(row_infos) <- rownames(cov)
> row_infos$Variables[which(is.na(row_infos$Variables))] <- c("necessity","educated")
> row_infos <- row_infos[order(row_infos$Variables),,drop=FALSE]
> 
> ann_col_wide <- data.frame(Variable=unique(row_infos$Variables))
> ann_colors_wide <- list(Variables=c(comm1="#bd0026",educated="orange", id1="#f6e8c3",instru1="#35978f",necessity="#b35806",integr1="#386cb0",intr1="#ffff99",ought1="grey",post1="black",prof1="pink"))
> 
> diag(cov) <- NA
> pheatmap(cov, main = "All Contexts",annotation_names_row = FALSE,cluster_cols=TRUE,cluster_rows=TRUE,annotation_col = row_infos[,1,drop=FALSE], annotation_row = row_infos[,1,drop=FALSE]
+ ,  annotation_colors = ann_colors_wide,show_colnames = FALSE,breaks = seq(-0.6,0.7,length.out = 50),width = 7,height = 7,color=colorRampPalette(brewer.pal(n = 7, name = "RdBu"))(50))

Evaluate internal constiency of known constructs

> sets <- list(id.var=likert_variables1[grep("\\.id1$",likert_variables1)],
+              ought.var=likert_variables1[grep("\\.ought1$",likert_variables1)],
+              intr.var=likert_variables1[grep("\\.intr1$",likert_variables1)],
+              instru.var=likert_variables1[grep("\\.instru1$",likert_variables1)],
+              integr1.var=likert_variables1[grep("\\.integr1$",likert_variables1)],
+              prof.var=likert_variables1[grep("\\.prof1$",likert_variables1)],
+              post.var=likert_variables1[grep("\\.post1$",likert_variables1)],
+              comm.var=likert_variables1[grep("\\.comm1$",likert_variables1)])
>               
> 
> get_alpha <- function(data=filtered_conv[filtered$Context == "Italian in Australia",],
+                       var=sets$id.var){
+   var_alpha <- alpha(data[,var])
+   dataf <- data.frame(alpha=var_alpha$total,
+                     drop = var_alpha$alpha.drop)
+   rownames(dataf) <- rownames(var_alpha$alpha.drop)
+   return(dataf)
+ }
> 
> # "Italian in Australia"
> ita_in_au <- do.call(rbind,lapply(sets,function(x) {
+   get_alpha(data=filtered_conv[filtered$Context == "Italian in Australia",],
+                       var=x)}))
> ita_in_au$var <- sapply(strsplit(rownames(ita_in_au),split="\\."),function(x) x[1]) 
> ita_in_au$var.full <- sapply(strsplit(rownames(ita_in_au),split="\\."),function(x) x[3]) 
> ita_in_au$Context <- "Italian in Australia"
> rownames(ita_in_au) <- NULL
> 
> # "German in Australia"
> germ_in_au <- do.call(rbind,lapply(sets,function(x) {
+   get_alpha(data=filtered_conv[filtered$Context == "German in Australia",],
+                       var=x)}))
## Warning in alpha(data[, var]): Some items were negatively correlated with the total scale and probably 
## should be reversed.  
## To do this, run the function again with the 'check.keys=TRUE' option
## Some items ( knowledge.instru1 ) were negatively correlated with the total scale and 
## probably should be reversed.  
## To do this, run the function again with the 'check.keys=TRUE' option
> germ_in_au$var <- sapply(strsplit(rownames(germ_in_au),split="\\."),function(x) x[1]) 
> germ_in_au$var.full <- sapply(strsplit(rownames(germ_in_au),split="\\."),function(x) x[3]) 
> germ_in_au$Context <- "German in Australia"
> rownames(germ_in_au) <- NULL
> 
> # "English in Germany"
> eng_in_germ <- do.call(rbind,lapply(sets[!(names(sets) %in% "comm.var")],function(x) {
+   get_alpha(data=filtered_conv[filtered$Context == "English in Germany",],
+                       var=x)}))
## Warning in alpha(data[, var]): Some items were negatively correlated with the total scale and probably 
## should be reversed.  
## To do this, run the function again with the 'check.keys=TRUE' option
## Some items ( people.ought1 ) were negatively correlated with the total scale and 
## probably should be reversed.  
## To do this, run the function again with the 'check.keys=TRUE' option
> # the ones that makes issues
> get_alpha(data=filtered_conv[filtered$Context == "English in Germany",],
+                       var=sets$ought.var)
## Warning in alpha(data[, var]): Some items were negatively correlated with the total scale and probably 
## should be reversed.  
## To do this, run the function again with the 'check.keys=TRUE' option
## Some items ( people.ought1 ) were negatively correlated with the total scale and 
## probably should be reversed.  
## To do this, run the function again with the 'check.keys=TRUE' option
##                 alpha.raw_alpha alpha.std.alpha alpha.G6.smc.
## consider.ought1       0.3811598       0.4843351     0.5179651
## people.ought1         0.3811598       0.4843351     0.5179651
## expect.ought1         0.3811598       0.4843351     0.5179651
## fail.ought1           0.3811598       0.4843351     0.5179651
##                 alpha.average_r alpha.S.N alpha.ase alpha.mean  alpha.sd
## consider.ought1       0.1901594 0.9392439  0.124171   2.193662 0.5264388
## people.ought1         0.1901594 0.9392439  0.124171   2.193662 0.5264388
## expect.ought1         0.1901594 0.9392439  0.124171   2.193662 0.5264388
## fail.ought1           0.1901594 0.9392439  0.124171   2.193662 0.5264388
##                 drop.raw_alpha drop.std.alpha drop.G6.smc. drop.average_r
## consider.ought1     0.10658753      0.2600925    0.3357223     0.10488382
## people.ought1       0.66493349      0.6896065    0.6118196     0.42547635
## expect.ought1       0.07746381      0.1203628    0.1691397     0.04362117
## fail.ought1         0.32542493      0.4077507    0.3904111     0.18665644
##                  drop.S.N drop.alpha.se
## consider.ought1 0.3515202    0.19016624
## people.ought1   2.2217171    0.06757193
## expect.ought1   0.1368323    0.18894532
## fail.ought1     0.6884782    0.14195832
> eng_in_germ$var <- sapply(strsplit(rownames(eng_in_germ),split="\\."),function(x) x[1]) 
> eng_in_germ$var.full <- sapply(strsplit(rownames(eng_in_germ),split="\\."),function(x) x[3]) 
> eng_in_germ$Context <- "English in Germany"
> rownames(eng_in_germ) <- NULL
> 
> # "English in Italy"
> eng_in_ita <- do.call(rbind,lapply(sets[!(names(sets) %in% "comm.var")],function(x) {
+   get_alpha(data=filtered_conv[filtered$Context == "English in Italy",],
+                       var=x)}))
> eng_in_ita$var <- sapply(strsplit(rownames(eng_in_ita),split="\\."),function(x) x[1]) 
> eng_in_ita$var.full <- sapply(strsplit(rownames(eng_in_ita),split="\\."),function(x) x[3]) 
> eng_in_ita$Context <- "English in Italy"
> rownames(eng_in_ita) <- NULL
> 
> # combine
> full_alpha <- rbind(eng_in_ita,eng_in_germ,germ_in_au,ita_in_au)
  • Plot alpha by variable
> full_alpha %>% group_by(Context,var) %>% 
+   summarise(st.alpha = unique(alpha.std.alpha),
+             G6=unique(alpha.G6.smc.)) %>%
+   ggplot(.,aes(x=var,y=st.alpha,colour=Context)) + geom_point() + geom_line(aes(group=Context)) + theme_bw()

> all_melt <- all_melt %>% separate(variable,into=c("item","type"),sep="\\.",remove=FALSE)
## Warning: Too few values at 652 locations: 9455, 9456, 9457, 9458, 9459,
## 9460, 9461, 9462, 9463, 9464, 9465, 9466, 9467, 9468, 9469, 9470, 9471,
## 9472, 9473, 9474, ...
> p1=ggplot(all_melt,aes(x=variable,fill=value)) + geom_bar(position = "stack") + 
+   facet_grid(Context~type,scales = "free") + ggtitle("Filtered dataset")+theme(axis.text.x = element_text(angle = 45, hjust = 1),axis.text=element_text(size=8))
> 
> p2=ggplot(full_alpha,aes(x=var.full,y=drop.std.alpha,colour=Context)) + geom_point() + geom_line(aes(group=Context)) + theme_bw() + facet_wrap(~var,scales="free")
> 
> p4=ggplot(full_alpha,aes(x=var.full,y=drop.average_r,colour=Context)) + geom_point() + geom_line(aes(group=Context)) + theme_bw() + facet_wrap(~var,scales="free")
> 
> p3=full_alpha %>% group_by(Context,var) %>% 
+   summarise(st.alpha = unique(alpha.std.alpha),
+             G6=unique(alpha.G6.smc.)) %>%
+   ggplot(.,aes(x=var,y=st.alpha,colour=Context)) + geom_point() + geom_line(aes(group=Context)) + theme(axis.text.x = element_text(angle = 45, hjust = 1),axis.text=element_text(size=8))
> 
> 
> plot_grid(p2,p3,p1,p4,nrow=4)